# -*- coding: utf-8 -*-

import os
import pandas as pd
import random
import pdfplumber

def pdf_to_txt(pdf_path, text_path):
    try:
        with pdfplumber.open(pdf_path) as pdf:
            with open(text_path, "w", encoding="utf-8") as f:
                for page in pdf.pages:
                    text = page.extract_text()
                    if text:
                        f.write(text)
        print(f"Text extracted to: {text_path}")
        print("pdf文件下载成功！")
    except Exception as e:
        print("pdf文件下载失败！")

if __name__ == "__main__":
    folder_path = "/home/ubuntu/code/git/subject-word-extraction/data/buchong/pdf"
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().split(".")[-1] == "pdf":
                try:
                    
                    file_name = file.lower().split(".")[0]
                    print(f"\n{file_name}")
                    pdf_file_path = os.path.join(folder_path, file)
                    txt_file_path = "/home/ubuntu/code/git/subject-word-extraction/data/buchong/txt" +"/"+f"{file_name}.txt"
                    pdf_to_txt(pdf_file_path, txt_file_path)
                except Exception as e:
                    print(e)
